import jieba


academy_titles = []
job_titles = []


#结巴分词的结果，采用list（）转为列表格式

with open("academy_titles.txt", encoding="utf-8", mode="r") as f:
    for line in f:
        academy_titles.append(list(jieba.cut(line.strip())))

with open("job_titles.txt", encoding="utf-8", mode="r") as f:
    for line in f:
        job_titles.append(list(jieba.cut(line.strip())))


academy_titles[:5]


word_set = set()
for line in academy_titles:
    for word in line:
        word_set.add(word)

for line in job_titles:
    for word in line:
        word_set.add(word)


print(len(word_set))



